package cn.edu.suda.ada.spider;

import java.io.BufferedInputStream;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.util.Properties;
import java.util.Scanner;

import org.apache.http.client.ClientProtocolException;

import cn.edu.suda.ada.fetcher.PageFetcher;
import cn.edu.suda.ada.model.FetchedPage;
import cn.edu.suda.ada.model.SpiderParams;
import cn.edu.suda.ada.parser.GanJiUrlParser;
import cn.edu.suda.ada.queue.StaticUrlQueue;
import cn.edu.suda.ada.queue.StaticVisitedUrlQueue;
import cn.edu.suda.ada.queue.UrlQueue;
import cn.edu.suda.ada.queue.VisitedUrlQueue;
import cn.edu.suda.ada.worker.GanJiWorker;

public class SpiderGanJi {
	private static UrlQueue urlQueueLevelOne=new UrlQueue();
	private static UrlQueue urlQueueLevelTwo=new UrlQueue();
	/**
	 * 准备初始的爬取链接  赶集网
	 * @throws IOException 
	 * @throws ClientProtocolException 
	 */
	private static void initializeQueueGanJi(String city) throws ClientProtocolException, IOException{
		// 赶集网租房35个一级页面，根据链接规则生成二级URLs放入带抓取队列
//		urlQueueLevelOne.clear();
		for(int i = 0; i <=20; i=i+20){
//			urlQueueLevelOne.addElement("http://"+city+".ganji.com/fang1/o" + i);
			urlQueueLevelOne.addElement("http://"+city+".ganji.com/xiaoqu/huashijiaoshixincun/chuzufang/f" + i);
//			urlQueueLevelOne.addElement("http://"+city+".ganji.com/xiaoqu/huashigaojiaoxincun/chuzufang/f" + i);
		}
		for(int i = 0; i <=20; i=i+20){
//			urlQueueLevelOne.addElement("http://"+city+".ganji.com/fang1/o" + i);
//			urlQueueLevelOne.addElement("http://"+city+".ganji.com/xiaoqu/huashijiaoshixincun/chuzufang/f" + i);
			urlQueueLevelOne.addElement("http://"+city+".ganji.com/xiaoqu/huashigaojiaoxincun/chuzufang/f" + i);
		}
		System.out.println(urlQueueLevelOne.getUrlQueue());
		
//		if(!urlQueueLevelOne.isEmpty()){
//			System.out.println("获取二级页面URL");
//			GanJiUrlParser urlParser=new GanJiUrlParser(urlQueueLevelOne,city,proxyIP,proxyPort);
//			urlParser.getUrlByParse();
//		}
	}
//	/**
//	 * 准备初始的爬取链接  赶集网
//	 * @throws IOException 
//	 * @throws ClientProtocolException 
//	 */
//	private static void initializeQueueGanJi(String city,String proxyIP,int proxyPort) throws ClientProtocolException, IOException{
//		// 赶集网租房35个一级页面，根据链接规则生成二级URLs放入带抓取队列
////		urlQueueLevelOne.clear();
//		for(int i = 1; i <=35; i ++){
//			urlQueueLevelOne.addElement("http://"+city+".ganji.com/fang1/o" + i);
//		}
//		System.out.println(urlQueueLevelOne.getUrlQueue());
//		
//		if(!urlQueueLevelOne.isEmpty()){
//			System.out.println("获取二级页面URL");
//			GanJiUrlParser urlParser=new GanJiUrlParser(urlQueueLevelOne,city,proxyIP,proxyPort);
//			urlParser.getUrlByParse();
//		}
//	}
	
	public static void spiderGanJi(String city,int theadId) throws ClientProtocolException, IOException{
		urlQueueLevelOne.clear();
		StaticUrlQueue.clearQueue();
		StaticVisitedUrlQueue.clearQueue();
		
		initializeQueueGanJi(city);//初始化一级列表
		
		GanJiWorker.setUrlQueueLevelOne(urlQueueLevelOne);
		
		int i;
		int k=SpiderParams.WORKER_NUM+theadId-1;
		for(i=theadId ; i <= k; i++){
			new Thread(new GanJiWorker(i,city)).start();
		}

	}
}
